library(raster) # raster handling (needed for relief)
## Loading required package: sp
library(viridis) # viridis color scale
## Loading required package: viridisLite
library(cowplot) # stack ggplots
library(colorspace) #nicer color scales
##
## Attaching package: 'colorspace'
## The following object is masked from 'package:raster':
##
## RGB
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks raster::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks raster::select()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(rworldmap)
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
# set default figure parameters for knitr
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
wmap <- rworldmap::getMap(resolution = "low") %>%
st_as_sf()
ggplot(data = wmap) +
geom_sf(aes(fill = NAME)) +
theme_minimal() +
guides(fill = FALSE) # don't show legend
wmap_ant <- getMap()[-which(getMap()$ADMIN=='Antarctica'),] %>%
st_as_sf()
ggplot(data = wmap_ant) +
geom_sf(aes(fill = NAME)) +
theme_minimal() +
guides(fill = FALSE) # don't show legend
nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
ggplot(nc) +
geom_sf(aes(fill = AREA))
# If not supplied, coord_sf() will take the CRS from the first layer
# and automatically transform all other layers to use that CRS. This
# ensures that all data will correctly line up
nc_3857 <- sf::st_transform(nc, 3857)
ggplot() +
geom_sf(data = nc) +
geom_sf(data = nc_3857, colour = "red", fill = NA)
# Unfortunately if you plot other types of feature you'll need to use
# show.legend to tell ggplot2 what type of legend to use
nc_3857$mid <- sf::st_centroid(nc_3857$geometry)
ggplot(nc_3857) +
geom_sf(colour = "white") +
geom_sf(aes(geometry = mid, size = AREA), show.legend = "point")
# You can also use layers with x and y aesthetics. To have these interpreted
# as longitude/latitude you need to set the default CRS in coord_sf()
ggplot(nc_3857) +
geom_sf() +
annotate("point", x = -80, y = 35, colour = "red", size = 4) +
coord_sf(default_crs = sf::st_crs(4326))
# To add labels, use geom_sf_label().
ggplot(nc_3857[1:3, ]) +
geom_sf(aes(fill = AREA)) +
geom_sf_label(aes(label = NAME))
il_counties <- map_data("county", "illinois")
head(il_counties)
## long lat group order region subregion
## 1 -91.49563 40.21018 1 1 illinois adams
## 2 -90.91121 40.19299 1 2 illinois adams
## 3 -90.91121 40.19299 1 3 illinois adams
## 4 -90.91121 40.10704 1 4 illinois adams
## 5 -90.91121 39.83775 1 5 illinois adams
## 6 -90.91694 39.75754 1 6 illinois adams
ggplot(il_counties, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "grey50") +
coord_quickmap()
il_county <- tigris::counties(state = "illinois", cb = TRUE) %>%
st_as_sf()
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|=================================== | 50%
|
|============================================ | 64%
|
|====================================================== | 78%
|
|=========================================================== | 85%
|
|=================================================================== | 95%
|
|======================================================================| 100%
ggplot(il_county) +
geom_sf(color = "blue", fill = "lightgray")
Data source: https://geo-csv.com/illinois/
illinois <- read_csv("../data/illinois.csv")
ggplot(il_county) +
geom_sf(color = "blue", fill = "gray") +
geom_point(data = illinois,
aes(x = city_longitude, y = city_latitude),
colour = "darkblue", size = 0.1, alpha = 0.4) +
coord_sf()
# bring in county pop + cities
pop <- read_csv("../data/dceo_county_pop.csv")
# merge by county
pop_all <- pop %>% filter(`Age Group` == "All" & Race == "All") %>% mutate(NAME = `State/County`)
county_pop <- full_join(il_county, pop_all, by="NAME")
# plot
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_continuous_sequential(palette = "viridis", rev = FALSE) +
coord_sf()
#try breaks
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_binned_sequential(
palette = "viridis",
rev = FALSE,
aesthetics = c("fill", "color"),
n.breaks = 3,
) +
labs(fill="2005 population") +
coord_sf()
#try logging
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_continuous_sequential(palette = "viridis", rev = FALSE, trans = "log", ) +
labs(fill="2005 population") +
coord_sf()
# plot
ggplot() + # set up basic plot (don't specify data b/c using two data sources)
geom_sf(data = county_pop, aes(fill = `2005`)) + # setup our county data
scale_fill_continuous_sequential(palette = "viridis", rev = FALSE, trans = "log", ) +
labs(fill="2005 population") +
geom_point(data = illinois, # bringing in our city data
aes(x = city_longitude, y = city_latitude),
colour = "white", size = 0.01, alpha = 0.2) + # room to improve aesthetically
coord_sf()
ggsave("il_counties_cities.png")
Source: https://timogrossenbacher.ch/bivariate-maps-with-ggplot2-and-sf/ and https://timogrossenbacher.ch/bivariate-maps-with-ggplot2-and-sf/
I did not create any of this, just FYI! it’s a streamlined version from the source website and repo
# read cantonal borders
canton_geo <- read_sf("../data/input/g2k15.shp")
# read country borders
country_geo <- read_sf("../data/input/g2l15.shp")
# read lakes
lake_geo <- read_sf("../data/input/g2s15.shp")
# read productive area (2324 municipalities)
municipality_prod_geo <- read_sf("../data/input/gde-1-1-15.shp")
# read in raster of relief
relief <- raster("../data/input/02-relief-ascii.asc") %>%
# hide relief outside of Switzerland by masking with country borders
mask(country_geo) %>%
as("SpatialPixelsDataFrame") %>%
as.data.frame() %>%
rename(value = `X02.relief.ascii`)
# clean up
rm(country_geo)
data <- read_csv("../data/input/data.csv")
# create 3 buckets for gini
quantiles_gini <- data %>%
pull(gini) %>%
quantile(probs = seq(0, 1, length.out = 4))
# create 3 buckets for mean income
quantiles_mean <- data %>%
pull(mean) %>%
quantile(probs = seq(0, 1, length.out = 4))
Here the municipalities are put into the appropriate class corresponding to their average income and income (in-)equality.
# cut into groups defined above and join fill
municipality_prod_geo <- municipality_prod_geo %>%
left_join(data, by = c("BFS_ID" = "bfs_id"))
class(municipality_prod_geo)
## [1] "sf" "tbl_df" "tbl" "data.frame"
# pretyfiying labels
# define number of classes
no_classes <- 6
# extract quantiles
quantiles <- municipality_prod_geo %>%
pull(mean) %>%
quantile(probs = seq(0, 1, length.out = no_classes + 1)) %>%
as.vector() # to remove names of quantiles, so idx below is numeric
# here we create custom labels
labels <- imap_chr(quantiles, function(., idx){
return(paste0(round(quantiles[idx] / 1000, 0),
"k",
" – ",
round(quantiles[idx + 1] / 1000, 0),
"k"))
})
# we need to remove the last label
# because that would be something like "478k - NA"
labels <- labels[1:length(labels) - 1]
municipality_prod_geo <- municipality_prod_geo %>%
mutate(mean_quantiles = cut(mean,
breaks = quantiles,
labels = labels,
include.lowest = T))
ggplot(
# define main data source
data = municipality_prod_geo
) +
# raster comes as the first layer, municipalities on top
geom_raster(
data = relief, aes( x = x, y = y, alpha = value ) ) +
# use the "alpha hack"
scale_alpha(name = "", range = c(0.6, 0), guide = "none") +
geom_sf(
mapping = aes(
fill = mean_quantiles
),
color = "white",
size = 0.1
) +
# use the Viridis color scale
scale_fill_viridis(
option = "magma",
name = "Average\nincome in CHF",
alpha = 0.8, # make fill a bit brighter
begin = 0.1, # this option seems to be new (compared to 2016):
# with this we can truncate the
# color scale, so that extreme colors (very dark and very bright) are not
# used, which makes the map a bit more aesthetic
end = 0.9,
discrete = T, # discrete classes, thus guide_legend instead of _colorbar
direction = 1, # dark is lowest, yellow is highest
guide = guide_legend(
keyheight = unit(5, units = "mm"),
title.position = "top",
reverse = T # display highest income on top
)) +
geom_sf(
data = canton_geo,
fill = "transparent",
color = "white",
size = 0.5
)+
# draw lakes in light blue
geom_sf(
data = lake_geo,
fill = "#D6F1FF",
color = "transparent"
) +
# add titles
labs(x = NULL,
y = NULL,
title = "Switzerland's regional income (in-)equality",
subtitle = paste0("Average yearly income and income",
" (in-)equality in Swiss municipalities, 2015"))
# create color scale that encodes two variables
# red for gini and blue for mean income
# the special notation with gather is due to readibility reasons
bivariate_color_scale <- tibble(
"3 - 3" = "#3F2949", # high inequality, high income
"2 - 3" = "#435786",
"1 - 3" = "#4885C1", # low inequality, high income
"3 - 2" = "#77324C",
"2 - 2" = "#806A8A", # medium inequality, medium income
"1 - 2" = "#89A1C8",
"3 - 1" = "#AE3A4E", # high inequality, low income
"2 - 1" = "#BC7C8F",
"1 - 1" = "#CABED0" # low inequality, low income
) %>%
gather("group", "fill")
municipality_prod_geo <- municipality_prod_geo %>%
mutate(
gini_quantiles = cut(
gini,
breaks = quantiles_gini,
include.lowest = TRUE
),
mean_quantiles = cut(
mean,
breaks = quantiles_mean,
include.lowest = TRUE
),
# by pasting the factors together as numbers we match the groups defined
# in the tibble bivariate_color_scale
group = paste(
as.numeric(gini_quantiles), "-",
as.numeric(mean_quantiles)
)
) %>%
# we now join the actual hex values per "group"
# so each municipality knows its hex value based on the his gini and avg
# income value
left_join(bivariate_color_scale, by = "group")
map <- ggplot(
# use the same dataset as before
data = municipality_prod_geo
) +
# first: draw the relief
geom_raster(
data = relief,
aes(
x = x,
y = y,
alpha = value
)
) +
# use the "alpha hack" (as the "fill" aesthetic is already taken)
scale_alpha(name = "",
range = c(0.6, 0),
guide = F) + # suppress legend
# color municipalities according to their gini / income combination
geom_sf(
aes(
fill = fill
),
# use thin white stroke for municipalities
color = "white",
size = 0.1
) +
# as the sf object municipality_prod_geo has a column with name "fill" that
# contains the literal color as hex code for each municipality, we can use
# scale_fill_identity here
scale_fill_identity() +
# use thicker white stroke for cantons
geom_sf(
data = canton_geo,
fill = "transparent",
color = "white",
size = 0.5
) +
# draw lakes in light blue
geom_sf(
data = lake_geo,
fill = "#D6F1FF",
color = "transparent"
) +
# add titles
labs(x = NULL,
y = NULL,
title = "Switzerland's regional income (in-)equality",
subtitle = paste0("Average yearly income and income",
" (in-)equality in Swiss municipalities, 2015")) +
# add the theme
theme_map()
# separate the groups
bivariate_color_scale <- bivariate_color_scale %>%
separate(group, into = c("gini", "mean"), sep = " - ") %>%
mutate(gini = as.integer(gini),
mean = as.integer(mean))
legend <- ggplot() +
geom_tile(
data = bivariate_color_scale,
mapping = aes(
x = gini,
y = mean,
fill = fill)
) +
scale_fill_identity() +
labs(x = "Higher inequality",
y = "Higher income") +
theme_map() +
# make font small enough
theme(
axis.title = element_text(size = 6)
) +
# quadratic tiles
coord_fixed()
ggdraw() +
draw_plot(map, 0, 0, 1, 1) +
draw_plot(legend, 0.05, 0.075, 0.2, 0.2)
annotations <- tibble(
label = c(
"Grey areas mean\nlow income and\nlow inequality",
"Blue areas mean\nhigh income and\nlow inequality",
"Violet areas mean\nhigh income and\nhigh inequality",
"Red areas mean\nlow income and\nhigh inequality"
),
arrow_from = c(
"548921,232972", # grey
"771356,238335", # blue
"781136,125067", # violet
"616348,81869" # red
),
arrow_to = c(
"622435,206784", # grey
"712671,261998", # blue
"786229,149597", # violet
"602334,122674" # red
),
curvature = c(
0.2, # grey
0.1, # blue
-0.1, # violet
-0.2 # red
),
nudge = c(
"-3000,0", # grey
"3000,5000", # blue
"0,-5000", # violet
"3000,0" # red
),
just = c(
"1,0", # grey
"0,1", # blue
"0.5,1", # violet
"0,1" # red
)
) %>%
separate(arrow_from, into = c("x", "y")) %>%
separate(arrow_to, into = c("xend", "yend")) %>%
separate(nudge, into = c("nudge_x", "nudge_y"), sep = "\\,") %>%
separate(just, into = c("hjust", "vjust"), sep = "\\,")
map_a <- ggplot(
# use the same dataset as before
data = municipality_prod_geo
) +
# first: draw the relief
geom_raster(
data = relief,
aes(
x = x,
y = y,
alpha = value
)
) +
# use the "alpha hack" (as the "fill" aesthetic is already taken)
scale_alpha(name = "",
range = c(0.6, 0),
guide = F) + # suppress legend
# color municipalities according to their gini / income combination
geom_sf(
aes(
fill = fill
),
# use thin white stroke for municipalities
color = "white",
size = 0.1
) +
# as the sf object municipality_prod_geo has a column with name "fill" that
# contains the literal color as hex code for each municipality, we can use
# scale_fill_identity here
scale_fill_identity() +
# use thicker white stroke for cantons
geom_sf(
data = canton_geo,
fill = "transparent",
color = "white",
size = 0.5
) +
# draw lakes in light blue
geom_sf(
data = lake_geo,
fill = "#D6F1FF",
color = "transparent"
) +
# add titles
labs(x = NULL,
y = NULL,
title = "Switzerland's regional income (in-)equality",
subtitle = paste0("Average yearly income and income",
" (in-)equality in Swiss municipalities, 2015")) +
# add the theme
theme_map()
# add annotations one by one by walking over the annotations data frame
# this is necessary because we cannot define nudge_x, nudge_y and curvature
# in the aes in a data-driven way like as with x and y, for example
annotations %>%
pwalk(function(...) {
# collect all values in the row in a one-rowed data frame
current <- tibble(...)
# convert all columns from x to vjust to numeric
# as pwalk apparently turns everything into a character (why???)
current <- current %>%
mutate_at(vars(x:vjust), as.numeric)
# update the plot object with global assignment
map_a <<- map_a +
# for each annotation, add an arrow
geom_curve(
data = current,
aes(
x = x,
xend = xend,
y = y,
yend = yend
),
# that's the whole point of doing this loop:
curvature = current %>% pull(curvature),
size = 0.2,
arrow = arrow(
length = unit(0.005, "npc")
)
) +
# for each annotation, add a label
geom_text(
data = current,
aes(
x = x,
y = y,
label = label,
hjust = hjust,
vjust = vjust
),
# that's the whole point of doing this loop:
nudge_x = current %>% pull(nudge_x),
nudge_y = current %>% pull(nudge_y),
color = "black",
size = 3
)
})
ggdraw() +
draw_plot(map_a, 0, 0, 1, 1) +
draw_plot(legend, 0.05, 0.075, 0.2, 0.2)
ggsave("switzerland.png")